# load pretty jupyter's magics
%load_ext pretty_jupyter
Module 1¶
#import ipynbname
import urllib.request
from IPython.display import Image, display
#notebook_name = ipynbname.name()
import os
MyModule='C:/Users/Gamaliel/Documents/G/ADD/IBM_DS/Python4DS/Mine/'
whichDir=MyModule
os.chdir(whichDir)
#notebook_name
Data types¶
- Words
- Strings
- 'A'
- Numbers -> integers
- int('1')
- Strings
- Numbers
- int
- positive or negative
- float
- between integers (0.1)
- boolean
- True (T,1) or False (F,0)
- bool(1) bool(0)
- True (T,1) or False (F,0)
- int
# Exchange formats
print(str(1.2))
print(str(1))
print(int(True))
print(bool(1))
print(float(True))
1.2 1 1 True 1.0
import sys
sys.float_info
sys.float_info(max=1.7976931348623157e+308, max_exp=1024, max_10_exp=308, min=2.2250738585072014e-308, min_exp=-1021, min_10_exp=-307, dig=15, mant_dig=53, epsilon=2.220446049250313e-16, radix=2, rounds=1)
Integers (int)¶
int(3.99)
3
Float¶
float(3)
3.0
Boolean¶
print(True)
print(False)
print(bool(1))
print(bool(0))
bool(int('1'))
True False True False
True
String¶
## String operations
name="Michael Jackson "
Slicing¶
print('name[::2]:',name[::2])
name[::2]: McalJcsn
Stride¶
print('name[0:7:2]:',name[0:7:2])
name[0:7:2]: Mcal
Concatenation¶
Statement=name+"is the best"
Statement
3*Statement
'Michael Jackson is the bestMichael Jackson is the bestMichael Jackson is the best'
Escape sequences¶
#print(len(name))
# / are meant to proceed escape sequences
# Escape sequences are strings that are difficult to input
print("1: Michael Jackson is the best")
print("2: Michael Jackson /n is the best")
print("3: /t Michael Jackson /n is the best")
print("4: Michael Jackson // is the best")
# /n: newline; /t: tab;
print(r"5: Michael Jackson / is the best")
split_string=Statement.split()
split_string
1: Michael Jackson is the best 2: Michael Jackson /n is the best 3: /t Michael Jackson /n is the best 4: Michael Jackson // is the best 5: Michael Jackson / is the best
['Michael', 'Jackson', 'is', 'the', 'best']
String opertions¶
# Sequence methods
# String methods
A="Thriller is the sixth studio audio";
B=A.upper()
print(A)
print(B)
C=name.replace('Michael','Janet')
print(C)
print(name.find('el'))
print(name.find('Jack'))
# output is -1 if not found
print(name.find('jack')) # case-sensitive
Thriller is the sixth studio audio THRILLER IS THE SIXTH STUDIO AUDIO Janet Jackson 5 8 -1
String interpolation¶
Introduced in Python 3.6, f-strings are a new way to format strings in Python. They are prefixed with 'f' and use curly braces {} to enclose the variables that will be formatted. For example:
name = "John"
age = 30
print(f"My name is {name} and I am {age} years old.")
My name is John and I am 30 years old.
name = "John"
age = 50
print("My name is {} and I am {} years old.".format(name, age))
My name is John and I am 50 years old.
In the code below:
- “My name is %s and I am %d years old.“: This is a string that includes format specifiers:
- %s: This is a placeholder for a string.
- %d: This is a placeholder for an integer.
- % (name, age): This is a tuple containing the variables name and age.
- The values of these variables will replace the placeholders in the string.
name = "Johnathan"
age = 30
print("My name is %s and I am %d years old." % (name, age))
My name is Johnathan and I am 30 years old.
x = 10
y = 20
print(f"The sum of x and y is {x+y}.")
The sum of x and y is 30.
Raw string (r' ')¶
regular_string = "C:/new_folder/file.txt"
print("Regular String:", regular_string)
raw_string = r"C:/new_folder/file.txt"
print("Raw String:", raw_string)
Regular String: C:/new_folder/file.txt Raw String: C:/new_folder/file.txt
RegEx¶
import re
Momstring = "The BodyGuard is the best album"
# Define the pattern to search for
pattern = r"Body"
# Use the search() function to search for the pattern in the string
result = re.search(pattern, Momstring)
# Check if a match was found
if result:
print("Match found!")
else:
print("Match not found.")
Match found!
Exercises from PY0101EN-1-2-Strings.ipynb¶
Regular expressions (RegEx) are patterns used to match and manipulate strings of text. There are several special sequences in RegEx that can be used to match specific characters or patterns.
| Special Sequence | Meaning | Example |
|---|---|---|
| /d | Matches any digit character (0-9) | "123" matches "/d/d/d" |
| /D | Matches any non-digit character | "hello" matches "/D/D/D/D/D" |
| /w | Matches any word character (a-z, A-Z, 0-9, and _) | "hello_world" matches "/w/w/w/w/w/w/w/w/w/w/w" |
| /W | Matches any non-word character | "@#$%" matches "/W/W/W/W" |
| /s | Matches any whitespace character (space, tab, newline, etc.) | "hello world" matches "/w/w/w/w/w/s/w/w/w/w/w" |
| /S | Matches any non-whitespace character | "hello_world" matches "/S/S/S/S/S/S/S/S/S/S/S" |
| /b | Matches the boundary between a word character and a non-word character | "cat" matches "/bcat/b" in "The cat sat on the mat" |
| /B | Matches any position that is not a word boundary | "cat" matches "/Bcat/B" in "category" but not in "The cat sat on the mat" |
Taken from: Python for DS > Module 1> Hands-On Lab: String operations> PY0101EN-1-2-Strings.ipynb
Special Sequence Examples:
A simple example of using the /d special sequence in a regular expression pattern with Python code:
pattern = r"/d/d/d/d/d/d/d/d/d/d" # Matches any ten consecutive digits
string = "The digits are 1234567890"
match = re.search(pattern, string)
print('string:',string)
#print('match : ', match)
if match:
print("match.goup():", match.group())
else:
print("No match")
string: The digits are 1234567890 No match
The match.group() method is used in Python's re module to retrieve the part of the string where the regular expression pattern matched.
## The findall() function finds all occurrences of a specified pattern within a string.
pattern = r"/w" # Matches any non-word character
pattern2=r"/W"
string = "Hello, world!"
matches = re.findall(pattern, string)
matches2 = re.findall(pattern2, string)
matches2 = re.findall(r"/W", string)
print("Matches:", matches)
print("Matches2:", matches2)
Matches: [] Matches2: []
s2 = "The BodyGuard is the best album of 'Whitney Houston'."
# Use the split function to split the string by the "/s"
split_array = re.split(r"/s", s2)
# The split_array contains all the substrings, split by whitespace characters
print(split_array)
# Define the regular expression pattern to search for
pattern = r"Whitney Houston"
# Define the replacement string
replacement = "legend"
# Use the sub function to replace the pattern with the replacement string
new_string = re.sub(pattern, replacement, s2, flags=re.IGNORECASE)
# The new_string contains the original string with the pattern replaced by the replacement string
print(new_string)
["The BodyGuard is the best album of 'Whitney Houston'."] The BodyGuard is the best album of 'legend'.
a = "1"
b='2'
print('values:',[a,b])
c=a+b
print('String addition:',c)
d=int(a)+int(b)
print('Int addition:',d)
values: ['1', '2'] String addition: 12 Int addition: 3
Module 1 Summary: Python Basics¶
Python can distinguish among data types such as integers, floats, strings, and Booleans.
Integers are whole numbers that can be positive or negative.
Floats include integers as well as decimal numbers between the integers.
You can convert integers to floats using typecasting and vice-versa.
You can convert integers and floats to strings.
You can convert an integer or float value to True (1) or False (0).
Expressions in Python are a combination of values and operations used to produce a single result.
Expressions perform mathematical operations such as addition, subtraction, multiplication, and so on.
We can use // to perform integer division, which results in an integer value by discarding the fractional part.
Python follows the order of operations (BODMAS) to perform operations with multiple expressions.
Variables store and manipulate data, allowing you to access and modify values throughout your code
The assignment operator "=" assigns a value to a variable.
":" denotes the value of the variable within the code.
Assigning another value to the same variable overrides the previous value of that variable.
You can perform mathematical operations on variables using the same or different variables.
Modifying the value of one variable will affect other variables only if they reference the same mutable object.
Python string operations involve manipulating text data using tasks such as indexing, concatenation, slicing, and formatting.
A string is usually written within double quotes or single quotes, including letters, white space, digits, or special characters.
A string attaches to another variable and is an ordered sequence of characters.
Characters in a string identify their index numbers, which can be positive or negative.
We use strings as a sequence to perform sequence operations.
You can input a stride value to perform slicing while operating on a string.
Operations like combining, concatenating, and replicating produce new strings, while finding the length of a string returns a number.
You cannot modify an existing string; they are immutable.
You can use escape sequences with a backslash (/) to change the layout of a string. (For example, /n for a new line, /t for a tab, and // for a backslash, etc.)
In Python, you perform tasks such as searching, modifying, and formatting text data with its pre-built string methods functions.
You apply a method to a string to change its value, resulting in another string.
You can perform actions such as changing the case of characters in a string, replacing items in a string, finding items in a string, and so on using pre-built string methods.
Module 2¶
Data types¶
Tuples¶
- Ordered sequences
- Written as comma-separated values within parentheses
In Python, there are different data types: String, Integer, and Float. These data types can all be contained in a tuple as follows:

Now, let us create your first tuple with string, integer and float.
# Create your first tuple
tuple1 = ("disco",10,1.2 )
tuple1
('disco', 10, 1.2)
The type of variable is a tuple.
# Print the type of the tuple you created
type(tuple1)
tuple
Indexing
Each element of a tuple can be accessed via an index. The following table represents the relationship between the index and the items in the tuple. Each element can be obtained by the name of the tuple followed by a square bracket with the index number:

We can print out each value in the tuple:
# Print the variable on each index
print(tuple1[0])
print(tuple1[1])
print(tuple1[2])
disco 10 1.2
We can print out the type of each value in the tuple:
# Print the type of value on each index
print(type(tuple1[0]))
print(type(tuple1[1]))
print(type(tuple1[2]))
<class 'str'> <class 'int'> <class 'float'>
We can also use negative indexing. We use the same table above with corresponding negative values:

We can obtain the last element as follows (this time we will not use the print statement to display the values):
# Use negative index to get the value of the last element
tuple1[-1]
1.2
We can display the next two elements as follows:
# Use negative index to get the value of the second last element
tuple1[-2]
10
# Use negative index to get the value of the third last element
tuple1[-3]
'disco'
Concatenate Tuples
We can concatenate or combine tuples by using the + sign:
# Concatenate two tuples
tuple2 = tuple1 + ("hard rock", 10)
tuple2
('disco', 10, 1.2, 'hard rock', 10)
We can slice tuples obtaining multiple values as demonstrated by the figure below:

Slicing
We can slice tuples, obtaining new tuples with the corresponding elements:
# Slice from index 0 to index 2
tuple2[0:3]
('disco', 10, 1.2)
We can obtain the last two elements of the tuple:
# Slice from index 3 to index 4
tuple2[3:5]
('hard rock', 10)
We can obtain the length of a tuple using the length command:
# Get the length of tuple
len(tuple2)
5
This figure shows the number of elements:

Sorting
Consider the following tuple:
# A sample tuple
Ratings = (0, 9, 6, 5, 10, 8, 9, 6, 2)
We can sort the values in a tuple and save it to a new tuple:
# Sort the tuple
RatingsSorted = sorted(Ratings)
RatingsSorted
[0, 2, 5, 6, 6, 8, 9, 9, 10]
Nested Tuple
A tuple can contain another tuple as well as other more complex data types. This process is called 'nesting'. Consider the following tuple with several elements:
# Create a nest tuple
NestedT =(1, 2, ("pop", "rock") ,(3,4),("disco",(1,2)))
Each element in the tuple, including other tuples, can be obtained via an index as shown in the figure:

# Print element on each index
print("Element 0 of Tuple: ", NestedT[0])
print("Element 1 of Tuple: ", NestedT[1])
print("Element 2 of Tuple: ", NestedT[2])
print("Element 3 of Tuple: ", NestedT[3])
print("Element 4 of Tuple: ", NestedT[4])
Element 0 of Tuple: 1
Element 1 of Tuple: 2
Element 2 of Tuple: ('pop', 'rock')
Element 3 of Tuple: (3, 4)
Element 4 of Tuple: ('disco', (1, 2))
We can use the second index to access other tuples as demonstrated in the figure:

We can access the nested tuples:
# Print element on each index, including nest indexes
print("Element 2, 0 of Tuple: ", NestedT[2][0])
print("Element 2, 1 of Tuple: ", NestedT[2][1])
print("Element 3, 0 of Tuple: ", NestedT[3][0])
print("Element 3, 1 of Tuple: ", NestedT[3][1])
print("Element 4, 0 of Tuple: ", NestedT[4][0])
print("Element 4, 1 of Tuple: ", NestedT[4][1])
Element 2, 0 of Tuple: pop Element 2, 1 of Tuple: rock Element 3, 0 of Tuple: 3 Element 3, 1 of Tuple: 4 Element 4, 0 of Tuple: disco Element 4, 1 of Tuple: (1, 2)
We can access strings in the second nested tuples using a third index:
# Print the first element in the second nested tuples
NestedT[2][1][0]
'r'
# Print the second element in the second nested tuples
NestedT[2][1][1]
'o'
We can use a tree to visualise the process. Each new index corresponds to a deeper level in the tree:

Similarly, we can access elements nested deeper in the tree with a third index:
# Print the first element in the second nested tuples
NestedT[4][1][0]
1
# Print the second element in the second nested tuples
NestedT[4][1][1]
2
The following figure shows the relationship of the tree and the element NestedT/[4]/[1]/[1]:

Lists¶
- Can contain all data formats
- Are mutable
##
##
##Taken from Notebook
List Content
Lists can contain strings, floats, and integers. We can nest other lists, and we can also nest tuples and other data structures. The same indexing conventions apply for nesting:
# Sample List
["Michael Jackson", 10.1, 1982, [1, 2], ("A", 1)]
['Michael Jackson', 10.1, 1982, [1, 2], ('A', 1)]
List Operations
We can also perform slicing in lists. For example, if we want the last two elements, we use the following command:
# Sample List
L = ["Michael Jackson", 10.1,1982,"MJ",1]
L
['Michael Jackson', 10.1, 1982, 'MJ', 1]

# List slicing
L[3:5]
['MJ', 1]
We can use the method extend to add new elements to the list:
# Use extend to add elements to list
L = [ "Michael Jackson", 10.2]
L.extend(['pop', 10])
L
['Michael Jackson', 10.2, 'pop', 10]
Another similar method is append. If we apply append instead of extend, we add one element to the list:
# Use append to add elements to list
L = [ "Michael Jackson", 10.2]
L.append(['pop', 10])
L
['Michael Jackson', 10.2, ['pop', 10]]
Each time we apply a method, the list changes. If we apply extend we add two new elements to the list. The list L is then modified by adding two new elements:
# Use extend to add elements to list
L = [ "Michael Jackson", 10.2]
L.extend(['pop', 10])
L
['Michael Jackson', 10.2, 'pop', 10]
If we append the list /['a','b'] we have one new element consisting of a nested list:
# Use append to add elements to list
L.append(['a','b'])
L
['Michael Jackson', 10.2, 'pop', 10, ['a', 'b']]
As lists are mutable, we can change them. For example, we can change the first element as follows:
# Change the element based on the index
A = ["disco", 10, 1.2]
print('Before change:', A)
A[0] = 'hard rock'
print('After change:', A)
Before change: ['disco', 10, 1.2] After change: ['hard rock', 10, 1.2]
We can also delete an element of a list using the del command:
# Delete the element based on the index
print('Before change:', A)
del(A[0])
print('After change:', A)
Before change: ['hard rock', 10, 1.2] After change: [10, 1.2]
We can convert a string to a list using split. For example, the method split translates every group of characters separated by a space into an element in a list:
# Split the string, default is by space
'hard rock'.split()
['hard', 'rock']
We can use the split function to separate strings on a specific character which we call a delimiter. We pass the character we would like to split on into the argument, which in this case is a comma. The result is a list, and each element corresponds to a set of characters that have been separated by a comma:
# Split the string by comma
'A,B,C,D'.split(',')
['A', 'B', 'C', 'D']
Copy and Clone List
When we set one variable B equal to A, both A and B are referencing the same list in memory:
# Copy (copy by reference) the list A
A = ["hard rock", 10, 1.2]
B = A
print('A:', A)
print('B:', B)
A: ['hard rock', 10, 1.2] B: ['hard rock', 10, 1.2]

Initially, the value of the first element in B is set as "hard rock". If we change the first element in A to "banana", we get an unexpected side effect. As A and B are referencing the same list, if we change list A, then list B also changes. If we check the first element of B we get "banana" instead of "hard rock":
# Examine the copy by reference
print('B[0]:', B[0])
A[0] = "banana"
print('B[0]:', B[0])
B[0]: hard rock B[0]: banana
This is demonstrated in the following figure:

You can clone list A by using the following syntax:
# Clone (clone by value) the list A
B = A[:]
B
['banana', 10, 1.2]
Variable B references a new copy or clone of the original list. This is demonstrated in the following figure:

Now if you change A, B will not change:
print('B[0]:', B[0])
A[0] = "hard rock"
print('B[0]:', B[0])
B[0]: banana B[0]: banana
my_list = [1, 2, 3, 4, 5]
my_list.insert(2, 6)
print(my_list)
[1, 2, 6, 3, 4, 5]
<h1 HTMLs from Course
Dictionaries
What are Dictionaries?¶
A dictionary consists of keys and values. It is helpful to compare a dictionary to a list. Instead of being indexed numerically like a list, dictionaries have keys. These keys are the keys that are used to access values within a dictionary.
The best example of a dictionary can be accessing person's detais using the social security number.
Here the social security number which is a unique number will be the key and the details of the people will be the values associated with it.

Create a Dictionary and access the elements¶
An example of a Dictionary Dict:
Here we are creating a dictionary named Dict with he following details
- Keys are key1, key2, key3, key4, key5.
- Values are {1,2,[3,3,3],(4,4,4),5,(0,1):6} corresponding to the keys
# Create the dictionary
Dict = {"key1": 1, "key2": "2", "key3": [3, 3, 3], "key4": (4, 4, 4), ('key5'): 5, (0, 1): 6}
Dict
{'key1': 1,
'key2': '2',
'key3': [3, 3, 3],
'key4': (4, 4, 4),
'key5': 5,
(0, 1): 6}
The keys can be strings:
# Access to the value by the key
Dict["key1"]
1
Keys can also be any immutable object such as a tuple:
# Access to the value by the key
Dict[(0, 1)]
6
Each key is separated from its value by a colon ":". Commas separate the items, and the whole dictionary is enclosed in curly braces. An empty dictionary without any items is written with just two curly braces, like this "{}".
# Create a sample dictionary
release_year_dict = {"Thriller": "1982", "Back in Black": "1980",
"The Dark Side of the Moon": "1973", "The Bodyguard": "1992",
"Bat Out of Hell": "1977", "Their Greatest Hits (1971-1975)": "1976",
"Saturday Night Fever": "1977", "Rumours": "1977"}
release_year_dict
{'Thriller': '1982',
'Back in Black': '1980',
'The Dark Side of the Moon': '1973',
'The Bodyguard': '1992',
'Bat Out of Hell': '1977',
'Their Greatest Hits (1971-1975)': '1976',
'Saturday Night Fever': '1977',
'Rumours': '1977'}
In summary, like a list, a dictionary holds a sequence of elements. Each element is represented by a key and its corresponding value. Dictionaries are created with two curly braces containing keys and values separated by a colon. For every key, there can only be one single value, however, multiple keys can hold the same value. Keys can only be strings, numbers, or tuples, but values can be any data type.
It is helpful to visualize the dictionary as a table, as in the following image. The first column represents the keys, the second column represents the values.

Keys¶
You can retrieve the values based on the names:
# Get value by keys
release_year_dict['Thriller']
'1982'
This corresponds to:

Similarly for The Bodyguard
# Get value by key
release_year_dict['The Bodyguard']
'1992'

Now let us retrieve the keys of the dictionary using the method keys():
# Get all the keys in dictionary
release_year_dict.keys()
dict_keys(['Thriller', 'Back in Black', 'The Dark Side of the Moon', 'The Bodyguard', 'Bat Out of Hell', 'Their Greatest Hits (1971-1975)', 'Saturday Night Fever', 'Rumours'])
You can retrieve the values using the method values():
# Get all the values in dictionary
release_year_dict.values()
dict_values(['1982', '1980', '1973', '1992', '1977', '1976', '1977', '1977'])
We can add an entry:
# Append value with key into dictionary
release_year_dict['Graduation'] = '2007'
release_year_dict
{'Thriller': '1982',
'Back in Black': '1980',
'The Dark Side of the Moon': '1973',
'The Bodyguard': '1992',
'Bat Out of Hell': '1977',
'Their Greatest Hits (1971-1975)': '1976',
'Saturday Night Fever': '1977',
'Rumours': '1977',
'Graduation': '2007'}
We can delete an entry:
# Delete entries by key
del(release_year_dict['Thriller'])
del(release_year_dict['Graduation'])
release_year_dict
{'Back in Black': '1980',
'The Dark Side of the Moon': '1973',
'The Bodyguard': '1992',
'Bat Out of Hell': '1977',
'Their Greatest Hits (1971-1975)': '1976',
'Saturday Night Fever': '1977',
'Rumours': '1977'}
We can verify if an element is in the dictionary:
# Verify the key is in the dictionary
'The Bodyguard' in release_year_dict
True
Sets¶
Set Content
A set is a unique collection of objects in Python. You can denote a set with a pair of curly brackets {}. Python will automatically remove duplicate items:
# Create a set
set1 = {"pop", "rock", "soul", "hard rock", "rock", "R&B", "rock", "disco"}
set1
{'R&B', 'disco', 'hard rock', 'pop', 'rock', 'soul'}
The process of mapping is illustrated in the figure:

You can also create a set from a list as follows:
# Convert list to set
album_list = [ "Michael Jackson", "Thriller", 1982, "00:42:19",
"Pop, Rock, R&B", 46.0, 65, "30-Nov-82", None, 10.0]
album_set = set(album_list)
album_set
{'00:42:19',
10.0,
1982,
'30-Nov-82',
46.0,
65,
'Michael Jackson',
None,
'Pop, Rock, R&B',
'Thriller'}
Now let us create a set of genres:
# Convert list to set
music_genres = set(["pop", "pop", "rock", "folk rock", "hard rock", "soul",
"progressive rock", "soft rock", "R&B", "disco"])
music_genres
{'R&B',
'disco',
'folk rock',
'hard rock',
'pop',
'progressive rock',
'rock',
'soft rock',
'soul'}
Set Operations
Let us go over set operations, as these can be used to change the set. Consider the set A:
# Sample set
A = set(["Thriller", "Back in Black", "AC/DC"])
A
{'AC/DC', 'Back in Black', 'Thriller'}
We can add an element to a set using the add() method:
# Add element to set
A.add("NSYNC")
A
{'AC/DC', 'Back in Black', 'NSYNC', 'Thriller'}
If we add the same element twice, nothing will happen as there can be no duplicates in a set:
# Try to add duplicate element to the set
A.add("NSYNC")
A
{'AC/DC', 'Back in Black', 'NSYNC', 'Thriller'}
We can remove an item from a set using the remove method:
# Remove the element from set
A.remove("NSYNC")
A
{'AC/DC', 'Back in Black', 'Thriller'}
We can verify if an element is in the set using the in command:
# Verify if the element is in the set
"AC/DC" in A
True
Sets Logic Operations
Remember that with sets you can check the difference between sets, as well as the symmetric difference, intersection, and union:
Consider the following two sets:
# Sample Sets
album_set1 = set(["Thriller", 'AC/DC', 'Back in Black'])
album_set2 = set([ "AC/DC", "Back in Black", "The Dark Side of the Moon"])

# Print two sets
album_set1, album_set2
({'AC/DC', 'Back in Black', 'Thriller'},
{'AC/DC', 'Back in Black', 'The Dark Side of the Moon'})
As both sets contain AC/DC and Back in Black we represent these common elements with the intersection of two circles.

You can find the intersect of two sets as follow using &:
# Find the intersections
intersection = album_set1 & album_set2
intersection
{'AC/DC', 'Back in Black'}
You can find all the elements that are only contained in album_set1 using the difference method:
# Find the difference in set1 but not set2
album_set1.difference(album_set2)
{'Thriller'}
You only need to consider elements in album_set1; all the elements in album_set2, including the intersection, are not included.

The elements in album_set2 but not in album_set1 is given by:
album_set2.difference(album_set1)
{'The Dark Side of the Moon'}

You can also find the intersection of album_list1 and album_list2, using the intersection method:
# Use intersection method to find the intersection of album_list1 and album_list2
album_set1.intersection(album_set2)
{'AC/DC', 'Back in Black'}
This corresponds to the intersection of the two circles:

The union corresponds to all the elements in both sets, which is represented by coloring both circles:

The union is given by:
# Find the union of two sets
album_set1.union(album_set2)
{'AC/DC', 'Back in Black', 'The Dark Side of the Moon', 'Thriller'}
And you can check if a set is a superset or subset of another set, respectively, like this:
# Check if superset
set(album_set1).issuperset(album_set2)
False
# Check if subset
set(album_set2).issubset(album_set1)
False
Here is an example where issubset() and issuperset() return true:
# Check if subset
set({"Back in Black", "AC/DC"}).issubset(album_set1)
True
# Check if superset
album_set1.issuperset({"Back in Black", "AC/DC"})
True
Module 2 Summary: Python Data Structures
In Python, we often use tuples to group related data together.Tuples refer to ordered and immutable collections of elements.
Tuples are usually written as comma-separated elements in parentheses “()".
You can include strings, integers, and floats in tuples and access them using both positive and negative indices.
You can perform operations such as combining, concatenating, and slicing on tuples.
Tuples are immutable, so you need to create a new tuple to manipulate it.
Tuples, termed nesting, can include other tuples of complex data types.
You can access elements in a nested tuple through indexing.
Lists in Python contain ordered collections of items that can hold elements of different types and are mutable, allowing for versatile data storage and manipulation.
A list is an ordered sequence, represented with square brackets "[]".
Lists possess mutability, rendering them akin to tuples.
A list can contain strings, integers, and floats; you can nest lists within it.
You can access each element in a list using both positive and negative indexing.
Concatenating or appending a list will result in the modification of the same list.
You can perform operations such as adding, deleting, splitting, and so forth on a list.
You can separate elements in a list using delimiters.
Aliasing occurs when multiple names refer to the same object.
You can also clone a list to create another list.
Dictionaries in Python are key-value pairs that provide a flexible way to store and retrieve data based on unique keys.
Dictionaries consist of keys and values, both composed of string elements.
You denote dictionaries using curly brackets.
The keys necessitate immutability and uniqueness.
The values may be either immutable or mutable, and they allow duplicates.
You separate each key-value pair with a comma, and you can use color highlighting to make the key more visible.
You can assign dictionaries to a variable.
You use the key as an argument to retrieve the corresponding value.
You can make additions and deletions to dictionaries.
You can perform an operation on a dictionary to check the key, which results in a true or false output.
You can apply methods to obtain a list of keys and values in a dictionary.
Sets in Python are collections of unique elements, useful for tasks such as removing duplicates and performing set operations like union and intersection. Sets lack order.
Curly brackets "{}" are helpful for defining elements of a set.
Sets do not contain duplicate items.
A list passed through the set function generates a set containing unique elements.
You use “Set Operations” to perform actions such as adding, removing, and verifying elements in a set.
You can combine sets using the ampersand "&" operator to obtain the common elements from both sets.
You can use the Union function to combine two sets, including both the common and unique elements from both sets.
The sub-set method is used to determine if two or more sets are subsets.
Module 3
Condition Statements
Comparison Operators
Comparison operations compare some value or operand and based on a condition, produce a Boolean. When comparing two values you can use these operators:
- equal: ==
- not equal: !=
- greater than: >
- less than: <
- greater than or equal to: >=
- less than or equal to: <=
Let's assign a a value of 5. Use the equality operator denoted with two equal == signs to determine if two values are equal. The case below compares the variable a with 6.
# Condition Equal
a = 5
a == 6
False
The result is False, as 5 does not equal to 6.
Consider the following equality comparison operator: i > 5. If the value of the left operand, in this case the variable i, is greater than the value of the right operand, in this case 5, then the statement is True. Otherwise, the statement is False. If i is equal to 6, because 6 is larger than 5, the output is True.
# Greater than Sign
i = 6
i > 5
True
Set i = 2. The statement is False as 2 is not greater than 5:
# Greater than Sign
i = 2
i > 5
False
# Inequality Sign
i = 2
i != 6
True
When i equals 6 the inequality expression produces False.
# Inequality Sign
i = 6
i != 6
False
# Use Equality sign to compare the strings
"ACDC" == "Michael Jackson"
False
If we use the inequality operator, the output is going to be True as the strings are not equal.
# Use Inequality sign to compare the strings
"ACDC" != "Michael Jackson"
True
The inequality operation is also used to compare the letters/words/symbols according to the ASCII value of letters. The decimal value shown in the following table represents the order of the character:
| Char. | ASCII | Char. | ASCII | Char. | ASCII | Char. | ASCII |
|---|---|---|---|---|---|---|---|
| A | 65 | N | 78 | a | 97 | n | 110 |
| B | 66 | O | 79 | b | 98 | o | 111 |
| C | 67 | P | 80 | c | 99 | p | 112 |
| D | 68 | Q | 81 | d | 100 | q | 113 |
| E | 69 | R | 82 | e | 101 | r | 114 |
| F | 70 | S | 83 | f | 102 | s | 115 |
| G | 71 | T | 84 | g | 103 | t | 116 |
| H | 72 | U | 85 | h | 104 | u | 117 |
| I | 73 | V | 86 | i | 105 | v | 118 |
| J | 74 | W | 87 | j | 106 | w | 119 |
| K | 75 | X | 88 | k | 107 | x | 120 |
| L | 76 | Y | 89 | l | 108 | y | 121 |
| M | 77 | Z | 90 | m | 109 | z | 122 |
For example, the ASCII code for ! is 33, while the ASCII code for + is 43. Therefore + is larger than ! as 43 is greater than 33.
Similarly, from the table above we see that the value for A is 65, and the value for B is 66, therefore:
# Compare characters
'B' > 'A'
True
When there are multiple letters, the first letter takes precedence in ordering:
# Compare characters
'BA' > 'AB'
True
Note: Upper Case Letters have different ASCII code than Lower Case Letters, which means the comparison between the letters in Python is case-sensitive.
Branching
Branching allows us to run different statements for different inputs. It is helpful to think of an if statement as a locked room, if the statement is True we can enter the room and your program will run some predefined tasks, but if the statement is False the program will ignore the task.
For example, consider the blue rectangle representing an ACDC concert. If the individual is older than 18, they can enter the ACDC concert. If they are 18 or younger, they cannot enter the concert.
We can use the condition statements learned before as the conditions that need to be checked in the if statement. The syntax is as simple as if condition statement :, which contains a word if, any condition statement, and a colon at the end. Start your tasks which need to be executed under this condition in a new line with an indent. The lines of code after the colon and with an indent will only be executed when the if statement is True. The tasks will end when the line of code does not contain the indent.
In the case below, the code print(“you can enter”) is executed only if the variable age is greater than 18 is a True case because this line of code has the indent. However, the execution of print(“move on”) will not be influenced by the if statement.
# If statement example
age = 19
#age = 18
#expression that can be true or false
if age > 18:
#within an indent, we have the expression that is run if the condition is true
print("you can enter" )
#The statements after the if statement will run regardless if the condition is true or false
print("move on")
you can enter move on
# Else statement example
age = 18
# age = 19
if age > 18:
print("you can enter" )
else:
print("go see Meat Loaf" )
print("move on")
go see Meat Loaf move on
# Elif statment example
age = 18
if age > 18:
print("you can enter" )
elif age == 18:
print("go see Pink Floyd")
else:
print("go see Meat Loaf" )
print("move on")
go see Pink Floyd move on
# Condition statement example
album_year = 1983
album_year = 1970
if album_year > 1980:
print("Album year is greater than 1980")
print('do something..')
do something..
# Condition statement example
album_year = 1983
#album_year = 1970
if album_year > 1980:
print("Album year is greater than 1980")
else:
print("less than 1980")
print('do something..')
Album year is greater than 1980 do something..
Logical operators
Sometimes you want to check more than one condition at once. For example, you might want to check if one condition and another condition are both True. Logical operators allow you to combine or modify conditions.
andornot
These operators are summarized for two variables using the following truth tables:
# Condition statement example
album_year = 1980
if(album_year > 1979) and (album_year < 1990):
print ("Album year was in between 1980 and 1989")
print("")
print("Do Stuff..")
Album year was in between 1980 and 1989 Do Stuff..
# Condition statement example
album_year = 1990
if(album_year < 1980) or (album_year > 1989):
print ("Album was not made in the 1980's")
else:
print("The Album was made in the 1980's ")
Album was not made in the 1980's
# Condition statement example
album_year = 1983
if not (album_year == 1984):
print ("Album year is not 1984")
Album year is not 1984
Loops
Range
Sometimes, you might want to repeat a given operation many times. Repeated executions like this are performed by loops. We will look at two types of loops, for loops and while loops.
Before we discuss loops lets discuss the range object. It is helpful to think of the range object as an ordered list. For now, let's look at the simplest case. If we would like to generate an object that contains elements ordered from 0 to 2 we simply use the following command:
For loop
The for loop enables you to execute a code block multiple times. For example, you would use this if you would like to print out every element in a list.
Let's try to use a for loop to print all the years presented in the list dates:
# For loop example
dates = [1982,1980,1973]
N = len(dates)
for i in range(N):
print(dates[i])
1982 1980 1973
# Example of for loop
for i in range(0, 2):
print(i)
0 1
In Python we can directly access the elements in the list as follows:
# Exmaple of for loop, loop through list
for year in dates:
print(year)
1982 1980 1973
# Use for loop to change the elements in list
squares = ['red', 'yellow', 'green', 'purple', 'blue']
for i in range(0, 5):
print("Before square ", i, 'is', squares[i])
squares[i] = 'white'
print("After square ", i, 'is', squares[i])
Before square 0 is red After square 0 is white Before square 1 is yellow After square 1 is white Before square 2 is green After square 2 is white Before square 3 is purple After square 3 is white Before square 4 is blue After square 4 is white
We can access the index and the elements of a list as follows:
# Loop through the list and iterate on both index and element value
squares=['red', 'yellow', 'green', 'purple', 'blue']
for i, square in enumerate(squares):
print(i, square)
0 red 1 yellow 2 green 3 purple 4 blue
Key point of For Loop:¶
- A for loop iterates over a sequence (such as a list, string, or range) or any object that supports iteration.
- It has a predefined number of iterations based on the length of the sequence or the number of items to iterate over.
- It automatically handles the iteration and does not require maintaining a separate variable for tracking the iteration count.
- It simplifies the code by encapsulating the iteration logic within the loop itself.
- It is commonly used when you know the exact number of iterations or need to iterate over each item in a collection.
While loop
As you can see, the for loop is used for a controlled flow of repetition. However, what if we don't know when we want to stop the loop? What if we want to keep executing a code block until a certain condition is met? The while loop exists as a tool for repeated execution based on a condition. The code block will keep being executed until the given logical condition returns a False boolean value.
Here's how a while loop works:¶
- First, you specify a condition that the loop will check before each iteration (repetition) of the code block.
- If the condition is initially true, the code block is executed.
- After executing the code block, the condition is checked again.
- If the condition is still true, the code block is executed again.
- Steps 3 and 4 repeat until the condition becomes false.
- Once the condition becomes false, the loop stops, and the program continues with the next line of code after the loop.
Here's an example of a while loop that prints numbers from 1 to 5:
count = 1
while count <= 5:
print(count)
count += 1
1 2 3 4 5
In this example, the condition count <= 5 is checked before each iteration. As long as count is less than or equal to 5, the code block inside the loop is executed. After each iteration, the value of count is incremented by 1 using count += 1. Once count reaches 6, the condition becomes false, and the loop stops.
Let’s say we would like to iterate through list dates and stop at the year 1973, then print out the number of iterations. This can be done with the following block of code:
# While Loop Example
dates = [1982, 1980, 1973, 2000]
i = 0
year = dates[0]
while(year != 1973):
print(year)
i = i + 1
year = dates[i]
print("It took ", i ,"repetitions to get out of loop.")
1982 1980 It took 2 repetitions to get out of loop.
Key point of While Loop:¶
- A while loop repeatedly executes a block of code as long as a given condition is true.
- It does not have a fixed number of iterations but continues executing until the condition becomes false.
- The condition is checked before each iteration, and if it's false initially, the code block is skipped entirely.
- The condition is typically based on a variable or expression that can change during the execution of the loop.
- It provides more flexibility in terms of controlling the loop's execution based on dynamic conditions.
Functions
A function is a reusable block of code which performs operations specified in the function. They let you break down tasks and allow you to reuse your code in different programs.
There are two types of functions :
- Pre-defined functions
- User defined functions
What is a Function?
You can define functions to provide the required functionality. Here are simple rules to define a function in Python:
- Functions blocks begin
deffollowed by the functionnameand parentheses(). - There are input parameters or arguments that should be placed within these parentheses.
- You can also define parameters inside these parentheses.
- There is a body within every function that starts with a colon (
:) and is indented. - You can also place documentation before the body.
- The statement
returnexits a function, optionally passing back a value.
An example of a function that adds on to the parameter a prints and returns the output as b:
# First function example: Add 1 to a and store as b
def add(a):
"""
add 1 to a
"""
b = a + 1
print(a, "if you add one", b)
return(b)
add(2)
2 if you add one 3
3
Functions work for 'str' and 'int'
# Define a function for multiple two numbers
def Mult(a, b):
c = a * b
return(c)
print('This is not printed')
result = Mult(12,2)
print(result)
Mult(2, "Michael Jackson ")
24
'Michael Jackson Michael Jackson '
Using if/else Statements and Loops in Functions
# Function example
def type_of_album(artist, album, year_released):
print(artist, album, year_released)
if year_released > 1980:
return "Modern"
else:
return "Oldie"
x = type_of_album("Michael Jackson", "Thriller", 1980)
print(x)
Michael Jackson Thriller 1980 Oldie
Obtaining frequencies
import pandas as pd
import matplotlib.pyplot as plt
# Python Program to Count words in a String using Dictionary
def freq(string):
#step1: A list variable is declared and initialized to an empty list.
words = []
#step2: Break the string into list of words
words = string.split() # or string.lower().split()
#step3: Declare a dictionary
Dict = {}
#step4: Use for loop to iterate words and values to the dictionary
for key in words:
Dict[key] = words.count(key)
#step5: Print the dictionary
#print("The Frequency of words is:",Dict)
return Dict
#step6: Call function and pass string in it
Freqs=freq("Mary had a little lamb Little lamb, little lamb Mary had a little lamb. Its fleece was white as snow And everywhere that Mary went Mary went, Mary went Everywhere that Mary went The lamb was sure to go")
val_based_rev = {k: v for k, v in sorted(Freqs.items(), key=lambda item: item[1], reverse=True)}
ks=list(val_based_rev.keys())
vs=list(val_based_rev.values())
fig = plt.figure()
ax = fig.add_axes([0.1,0.2,0.75,0.75])
plt.plot(ks,vs)
ax.xaxis.set_tick_params(rotation=45, labelsize=10)
plt.show()
Global variables
So far, we've been creating variables within functions, but we have not discussed variables outside the function. These are called global variables.
Let's try to see what printer1 returns:
# Example of global variable
artist = "Michael Jackson"
def printer1(artist):
internal_var1 = artist
print(artist, "is an artist")
printer1(artist)
# try runningthe following code
#printer1(internal_var1)
Michael Jackson is an artist
Setting default argument values in your custom functions
# Example for setting param with default value
# DefRating is global in the scope
DefRating=7
def isGoodRating(rating=4):
if(rating < DefRating):
print("this album sucks it's rating is",rating)
else:
print("this album is good its rating is",rating)
#isGoodRating()
isGoodRating(10)
this album is good its rating is 10
Collections and Functions
When the number of arguments are unknown for a function, They can all be packed into a tuple as shown:
def printAll(*args): # All the arguments are 'packed' into args which can be treated like a tuple
print("No of arguments:", len(args))
for argument in args:
print(argument)
#printAll with 3 arguments
printAll('Horsefeather','Adonis','Bone')
#printAll with 4 arguments
printAll('Sidecar','Long Island','Mudslide','Carriage')
No of arguments: 3 Horsefeather Adonis Bone No of arguments: 4 Sidecar Long Island Mudslide Carriage
Similarly, The arguments can also be packed into a dictionary as shown:
def printDictionary(**args):
for key in args:
print(key + " : " + args[key])
printDictionary(Country='Canada',Province='Ontario',City='Toronto')
Country : Canada Province : Ontario City : Toronto
Functions can be incredibly powerful and versatile. They can accept (and return) data types, objects and even other functions as arguements. Consider the example below:
def addItems(list):
list.append("Three")
list.append("Four")
myList = ["One","Two"]
addItems(myList)
myList
['One', 'Two', 'Three', 'Four']
Exception Handling
A try except will allow you to execute code that might raise an exception and in the case of any exception or a specific one we can handle or catch the exception and execute specific code. This will allow us to continue the execution of our program even if there is an exception.
Python tries to execute the code in the try block. In this case if there is any exception raised by the code in the try block, it will be caught and the code block in the except block will be executed. After that, the code that comes after the try except will be executed.
Try Except Else and Finally Example¶
a = 1
try:
#b = int(input("Please enter a number to divide a"))
b = 1
a = a/b
except ZeroDivisionError:
print("The number you provided cant divide 1 because it is 0")
except ValueError:
print("You did not provide a number")
except:
print("Something went wrong")
else:
print("success a=",a)
finally:
print("Processing Complete")
success a= 1.0 Processing Complete
Classes and Objects
Creating a Class
The first step in creating a class is giving it a name. In this notebook, we will create two classes: Circle and Rectangle. We need to determine all the data that make up that class, which we call attributes. Think about this step as creating a blue print that we will use to create objects. In figure 1 we see two classes, Circle and Rectangle. Each has their attributes, which are variables. The class Circle has the attribute radius and color, while the Rectangle class has the attribute height and width. Let’s use the visual examples of these shapes before we get to the code, as this will help you get accustomed to the vocabulary.

Figure 1: Classes circle and rectangle, and each has their own attributes. The class Circle has the attribute radius and colour, the class Rectangle has the attributes height and width.
Instances of a Class: Objects and Attributes
An instance of an object is the realisation of a class, and in Figure 2 we see three instances of the class circle. We give each object a name: red circle, yellow circle, and green circle. Each object has different attributes, so let's focus on the color attribute for each object.

Figure 2: Three instances of the class Circle, or three objects of type Circle.
The colour attribute for the red Circle is the colour red, for the green Circle object the colour attribute is green, and for the yellow Circle the colour attribute is yellow.
Methods
Methods give you a way to change or interact with the object; they are functions that interact with objects. For example, let’s say we would like to increase the radius of a circle by a specified amount. We can create a method called add_radius(r) that increases the radius by r. This is shown in figure 3, where after applying the method to the "orange circle object", the radius of the object increases accordingly. The “dot” notation means to apply the method to the object, which is essentially applying a function to the information in the object.

Figure 3: Applying the method “add_radius” to the object orange circle object.
Creating a Class
Now we are going to create a class Circle, but first, we are going to import a library to draw the objects:
# Import the library
import matplotlib.pyplot as plt
%matplotlib inline
The first step in creating your own class is to use the class keyword, then the name of the class as shown in Figure 4. In this course the class parent will always be object:

Figure 4: Creating a class Circle.
The next step is a special method called a constructor /__init/_/_, which is used to initialize the object. The inputs are data attributes. The term self contains all the attributes in the set. For example the self.color gives the value of the attribute color and self.radius will give you the radius of the object. We also have the method add_radius() with the parameter r, the method adds the value of r to the attribute radius. To access the radius we use the syntax self.radius. The labeled syntax is summarized in Figure 5:

Figure 5: Labeled syntax of the object circle.
The actual object is shown below. We include the method drawCircle to display the image of a circle. We set the default radius to 3 and the default colour to blue:
# Create a class Circle
class Circle(object):
# Constructor
def __init__(self, radius=3, color='blue'):
self.radius = radius
self.color = color
# Method
def add_radius(self, r):
self.radius = self.radius + r
return(self.radius)
# Method
def drawCircle(self):
plt.gca().add_patch(plt.Circle((0, 0), radius=self.radius, fc=self.color))
plt.axis('scaled')
plt.show()
Creating an instance of a class Circle
Let’s create the object RedCircle of type Circle to do the following:
# Create an object RedCircle
RedCircle = Circle(10, 'red')
We can use the dir command to get a list of the object's methods. Many of them are default Python methods.
# Find out the methods can be used on the object RedCircle
dir(RedCircle)
['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', 'add_radius', 'color', 'drawCircle', 'radius']
We can look at the data attributes of the object:
# Print the object attribute radius
RedCircle.radius
10
# Print the object attribute color
RedCircle.color
'red'
We can change the object's data attributes:
# Set the object attribute radius
RedCircle.radius = 1
RedCircle.radius
1
We can draw the object by using the method drawCircle():
# Call the method drawCircle
RedCircle.drawCircle()
We can increase the radius of the circle by applying the method add_radius(). Let's increases the radius by 2 and then by 5:
# Use method to change the object attribute radius
print('Radius of object:',RedCircle.radius)
RedCircle.add_radius(2)
print('Radius of object of after applying the method add_radius(2):',RedCircle.radius)
RedCircle.add_radius(5)
print('Radius of object of after applying the method add_radius(5):',RedCircle.radius)
Radius of object: 1 Radius of object of after applying the method add_radius(2): 3 Radius of object of after applying the method add_radius(5): 8
Let’s create a blue circle. As the default colour is blue, all we have to do is specify what the radius is:
# Create a blue circle with a given radius
BlueCircle = Circle(radius=100)
As before, we can access the attributes of the instance of the class by using the dot notation:
# Print the object attribute radius
BlueCircle.radius
100
# Print the object attribute color
BlueCircle.color
'blue'
We can draw the object by using the method drawCircle():
# Call the method drawCircle
BlueCircle.drawCircle()
Compare the x and y axis of the figure to the figure for RedCircle; they are different.
Task-5. Additionally, you need to create two objects of the Vehicle class object that should have a max speed of 200kph and mileage of 50000kmpl with five seating capacities, and another car object should have a max speed of 180kph and 75000kmpl with four seating capacities.¶
#Type your code here
class car():
color='white'
def __init__(self,maxspeed,miles):
self.maxspeed=maxspeed
self.miles=miles
self.sitting_cap=None
def assign_seating_cap(self,seating_cap):
self.seating_cap=seating_cap
def display_properties(self):
print("Properties of the Vehicle:")
print("Color:", self.color)
print("Maximum Speed:", self.maxspeed)
print("Mileage:", self.miles)
print("Seating Capacity:", self.seating_cap)
car1=car(200,20)
car1.assign_seating_cap(5)
car1.display_properties()
Properties of the Vehicle: Color: white Maximum Speed: 200 Mileage: 20 Seating Capacity: 5
Module 3 Summary: Python Programming Fundamentals¶
Summary
Python conditions use “if” statements to execute code based on true/false conditions created by comparisons and Boolean expressions.
Comparison operations require using comparison operators equal to "=", greater than ">", less than "<".
An exclamation mark "!" is used to define inequalities of a variable.
You can compare integers, strings, and floats.
Python branching directs program flow by using conditional statements (for example, if, else, elif) to execute different code blocks based on conditions or tests.
You can use the "if" statement with conditions to define actions if true.
To perform actions based on true or false output, you can use the "else" statement with conditions.
The elif statement allows for additional checks only if the initial condition is false.
To execute various operations on Boolean values, we use Boolean logic operators.
Python loops are control structures that automate repetitive tasks and iterate over data structures like lists or dictionaries.
The range() function generates a sequence of numbers with a specified start, stop, and step value for loops in Python.
A for loop in Python iterates over a sequence, such as a list, tuple, or string, and executes a block of code for each item in the sequence.
A while loop in Python executes a block of code as long as a specified condition remains true.
Python functions are reusable code blocks that perform specific tasks, take input parameters, and often return results, enhancing code modularity and reusability.
You may or may not have written the codes that are often included in functions.
Python has a set of built-in functions such as "len" to find the length of a sequence or "sum" to find the total sum of a sequence.
The "sorted" function creates a new sorted list, while "sort" sorts items in the original list.
You can also create your own functions in Python.
To ensure clarity and organization and facilitate understanding and maintenance of the code, developers must document functions using a documentation string enclosed in three quotes.
The help command will return the documentation defined for a particular function.
A function can have multiple parameters.
“No return” statement in the function means that the function will return nothing.
The "No work" function does not execute any task. You can use the "pass" keyword to meet the requirement of a non-empty body.
A function will usually perform more than one task.
In Python, the scope of a variable determines where you can access or modify that variable. Global scope allows access from anywhere, while local scope restricts it to a block or function.
In Python, a programmer defines a local variable within a specific block or function, which can only be accessed or modified within that block or function.
In Python, a global variable is a variable defined at the top level of a program that any part of the code can access or modify.
Exception handling in Python is a mechanism for managing and responding to errors and exceptions that may occur during program execution, preventing them from crashing the program.
In Python, you use the "try-except" statement to attempt a block of code and specify alternative actions to execute if an error occurs, allowing you to handle exceptions.
In Python, you use the "try-except-else" statement to attempt a block of code, handle exceptions in the "except" block, and execute code in the "else" block when no exceptions occur.
Python developers use the "try-except-else-finally" statement to attempt a block of code, catch exceptions in the "except" block, execute code in the "else" block when no exceptions occur, and ensure that the "finally" block always runs, regardless of whether an exception raised or not.
In Python, objects are instances of classes that encapsulate data and behavior, serving as the foundation for creating and working with various data types and custom data structures.
To determine the type of an object in Python, you can use the
type()command.Any changes made within the method of the object may result in a change in object type.
Classes in Python are blueprints for creating objects, defining their attributes and methods, enabling code organization, and object-oriented programming.
Function "init" is a special method used to initialize data attributes.
We can create instances of a class in Python.
Data attributes consist of the data defining the objects.
Methods are functions that interact and change the data attributes.
The method has a function that requires the self as well as other parameters.
Module 4
Working In Python With Text Files
One way to read or write a file in Python is to use the built-in open function. The open function provides a File object that contains the methods and attributes you need in order to read, save, and manipulate the file. In this notebook, we will only cover .txt files. The first parameter you need is the file path and the file name. An example is shown as follow:

The mode argument is optional and the default value is r. In this notebook we only cover two modes:
- **r**: Read mode for reading files
- **w**: Write mode for writing files
For the next example, we will use the text file Example1.txt. The file is shown as follows:

We read the file:
# Read the Example1.txt
example1 = "C:/Users/Gamaliel/Documents/G/ADD/IBM_DS/Python4DS/IBMs-NBs/Mod4/example1.txt"
file1 = open(example1, "r")
We can view the attributes of the file.
The name of the file:
# Print the path of file
file1.name
'C:/Users/Gamaliel/Documents/G/ADD/IBM_DS/Python4DS/IBMs-NBs/Mod4/example1.txt'
The mode the file object is in:
# Print the mode of file, either 'r' or 'w'
file1.mode
'r'
We can read the file and assign it to a variable :
# Read the file
FileContent = file1.read()
FileContent
'This is line a\nThis is line c\n'
The /n means that there is a new line.
We can print the file:
# Print the file with '/n' as a new line
print(FileContent)
This is line a This is line c
The file is of type string:
# Type of file content
type(FileContent)
str
It is very important that the file is closed in the end. This frees up resources and ensures consistency across different python versions.
# Close file after finish
file1.close()
A Better Way to Open a File
Using the with statement is better practice, it automatically closes the file even if the code encounters an exception. The code will run everything in the indent block then close the file object.
# Open file using with
with open(example1, "r") as file1:
FileContent = file1.read()
print(FileContent)
This is line a This is line c
The file object is closed, you can verify it by running the following cell:
# Verify if the file is closed
file1.closed
True
We can see the info in the file:
# See the content of file
print(FileContent)
This is line a This is line c
The syntax is a little confusing as the file object is after the as statement. We also don’t explicitly close the file. Therefore we summarize the steps in a figure:

We don’t have to read the entire file, for example, we can read the first 4 characters by entering three as a parameter to the method .read():
# Read first four characters
with open(example1, "r") as file1:
print(file1.read(4))
This
Once the method .read(4) is called the first 4 characters are called. If we call the method again, the next 4 characters are called. The output for the following cell will demonstrate the process for different inputs to the method read():
# Read certain amount of characters
with open(example1, "r") as file1:
print(file1.read(4))
print(file1.read(4))
print(file1.read(7))
print(file1.read(15))
This is line a This is line c
The process is illustrated in the below figure, and each color represents the part of the file read after the method read() is called:

We can also read one line of the file at a time using the method readline():
# Read one line
with open(example1, "r") as file1:
print("first line: " + file1.readline())
first line: This is line a
We can also pass an argument to readline() to specify the number of charecters we want to read. However, unlike read(), readline() can only read one line at most.
with open(example1, "r") as file1:
print(file1.readline(20)) # does not read past the end of line
print(file1.read(20)) # Returns the next 20 chars
This is line a This is line c
We can use a loop to iterate through each line:
# Iterate through the lines
with open(example1,"r") as file1:
i = 0;
for line in file1:
print("Iteration", str(i), ": ", line)
i = i + 1
Iteration 0 : This is line a Iteration 1 : This is line c
We can use the method readlines() to save the text file to a list:
# Read all lines and save as a list
with open(example1, "r") as file1:
FileasList = file1.readlines()
Each element of the list corresponds to a line of text:
Writing Files
We can open a file object using the method write() to save the text file to a list. To write to a file, the mode argument must be set to w. Let’s write a file Example2.txt with the line: “This is line A”
# Write line to file
exmp2 = 'Example2.txt'
with open(exmp2, 'w') as writefile:
writefile.write("This is line A")
We can read the file to see if it worked:
# Read file
with open(exmp2, 'r') as testwritefile:
print(testwritefile.read())
This is line A
We can write multiple lines:
# Write lines to file
with open(exmp2, 'w') as writefile:
writefile.write("This is line A/n")
writefile.write("This is line B/n")
The method .write() works similarly to the method .readline(), except instead of reading a new line it writes a new line. The process is illustrated in the figure. The different colour coding of the grid represents a new line added to the file after each method call.

You can check the file to see if your results are correct.
# Check whether write to file
with open(exmp2, 'r') as testwritefile:
print(testwritefile.read())
This is line A/nThis is line B/n
We write a list to a .txt file as follows:
# Sample list of text
Lines = ["This is line A/n", "This is line B/n", "This is line C/n"]
Lines
['This is line A/n', 'This is line B/n', 'This is line C/n']
# Write the strings in the list to text file
with open('Example2.txt', 'w') as writefile:
for line in Lines:
print(line)
writefile.write(line)
This is line A/n This is line B/n This is line C/n
We can verify the file is written by reading it and printing out the values:
# Verify if writing to file is successfully executed
with open('Example2.txt', 'r') as testwritefile:
print(testwritefile.read())
This is line A/nThis is line B/nThis is line C/n
However, note that setting the mode to w overwrites all the existing data in the file.
with open('Example2.txt', 'w') as writefile:
writefile.write("Overwrite/n")
with open('Example2.txt', 'r') as testwritefile:
print(testwritefile.read())
Overwrite/n
Appending Files
We can write to files without losing any of the existing data as follows by setting the mode argument to append: a. You can append a new line as follows:
# Write a new line to text file
with open('Example2.txt', 'a') as testwritefile:
testwritefile.write("This is line C/n")
testwritefile.write("This is line D/n")
testwritefile.write("This is line E/n")
You can verify the file has changed by running the following cell:
# Verify if the new line is in the text file
with open('Example2.txt', 'r') as testwritefile:
print(testwritefile.read())
Overwrite/nThis is line C/nThis is line D/nThis is line E/n
Additional modes
It's fairly inefficient to open the file in a or w and then reopen it in r to read any lines. Luckily we can access the file in the following modes:
- r+ : Reading and writing. Cannot truncate the file.
- w+ : Writing and reading. Truncates the file.
- a+ : Appending and Reading. Creates a new file, if none exists. You dont have to dwell on the specifics of each mode for this lab.
Let's try out the a+ mode:
with open('Example2.txt', 'a+') as testwritefile:
testwritefile.write("This is line E/n")
print(testwritefile.read())
#testwritefile.seek(0,0) # move 0 bytes from beginning.
#data = testwritefile.read()
#print(data)
There were no errors but read() also did not output anything. This is because of our location in the file.
Most of the file methods we've looked at work in a certain location in the file. .write() writes at a certain location in the file. .read() reads at a certain location in the file and so on. You can think of this as moving your pointer around in the notepad to make changes at a specific location.
Opening the file in w is akin to opening the .txt file, moving your cursor to the beginning of the text file, writing new text and deleting everything that follows.
Whereas opening the file in a is similar to opening the .txt file, moving your cursor to the very end and then adding the new pieces of text.
It is often very useful to know where the 'cursor' is in a file and be able to control it. The following methods allow us to do precisely this -
.tell()- returns the current position in bytes.seek(offset,from)- changes the position by 'offset' bytes with respect to 'from'. From can take the value of 0,1,2 corresponding to the beginning, relative to current position and end
Now lets revisit a+
with open('Example2.txt', 'a+') as testwritefile:
print("Initial Location: {}".format(testwritefile.tell()))
data = testwritefile.read()
if (not data): #empty strings return false in python
print('Read nothing')
else:
print(testwritefile.read())
testwritefile.seek(0,0) # move 0 bytes from beginning.
print("/nNew Location : {}".format(testwritefile.tell()))
data = testwritefile.read()
if (not data):
print('Read nothing')
else:
print(data)
print("Location after read: {}".format(testwritefile.tell()) )
Initial Location: 75 Read nothing /nNew Location : 0 Overwrite/nThis is line C/nThis is line D/nThis is line E/nThis is line E/n Location after read: 75
Finally, a note on the difference between w+ and r+. Both of these modes allow access to read and write methods; however, opening a file in w+ overwrites it and deletes all pre-existing data.
lines=[]
with open('Example2.txt', 'r+') as testwritefile:
testwritefile.seek(0,0) #write at beginning of file
for l in range(1,5):
lis=f"Line {l}" + "/n"
lines.append(lis)
testwritefile.write(lis)
testwritefile.write("finished/n")
testwritefile.seek(0,0)
print(testwritefile.read())
Line 1/nLine 2/nLine 3/nLine 4/nfinished/nnThis is line E/nThis is line E/n
To work with a file on existing data, use r+ and a+. While using r+, it can be useful to add a .truncate() method at the end of your data. This will reduce the file to your data and delete everything that follows.
with open('Example2.txt', 'r+') as testwritefile:
testwritefile.seek(0,0) #write at beginning of file
testwritefile.write("Line 1" + "/n")
testwritefile.write("Line 2" + "/n")
testwritefile.write("Line 3" + "/n")
testwritefile.write("Line 4" + "/n")
testwritefile.write("finished/n")
testwritefile.truncate()
testwritefile.seek(0,0)
print(testwritefile.read())
Line 1/nLine 2/nLine 3/nLine 4/nfinished/n
Copy a File
Let's copy the file Example2.txt to the file Example3.txt:
# Copy file to another
with open('Example2.txt','r') as readfile:
with open('Example3.txt','w') as writefile:
for line in readfile:
writefile.write(line)
We can read the file to see if everything works:
# Verify if the copy is successfully executed
with open('Example3.txt','r') as testwritefile:
print(testwritefile.read())
Line 1/nLine 2/nLine 3/nLine 4/nfinished/n
After reading files, we can also write data into files and save them in different file formats like .txt, .csv, .xls (for excel files) etc. You will come across these in further examples
NOTE: If you wish to open and view the example3.txt file, download this lab here and run it locally on your machine. Then go to the working directory to ensure the example3.txt file exists and contains the summary data that we wrote.
# Notes end Here
Reading ad writing with Panda
Pandas: DataFrame and Series
Pandas is a popular library for data analysis built on top of the Python programming language. Pandas generally provide two data structures for manipulating data, They are:
- DataFrame
- Series
A DataFrame is a two-dimensional data structure, i.e., data is aligned in a tabular fashion in rows and columns.
- A Pandas DataFrame will be created by loading the datasets from existing storage.
- Storage can be SQL Database, CSV file, Excel file, etc.
- It can also be created from the lists, dictionaries, and from a list of dictionaries.
Series represents a one-dimensional array of indexed data. It has two main components :
- An array of actual data.
- An associated array of indexes or data labels.
The index is used to access individual data values. You can also get a column of a dataframe as a Series. You can think of a Pandas series as a 1-D dataframe.
# let us import the Pandas Library
import pandas as pd
Once you’ve imported pandas, you can then use the functions built in it to create and analyze data.
In this practice lab, we will learn how to create a DataFrame out of a dictionary.
Let us consider a dictionary 'x' with keys and values as shown below.
We then create a dataframe from the dictionary using the function pd.DataFrame(dict)
#Define a dictionary 'x'
x = {'Name': ['Rose','John', 'Jane', 'Mary'], 'ID': [1, 2, 3, 4], 'Department': ['Architect Group', 'Software Group', 'Design Team', 'Infrastructure'],
'Salary':[100000, 80000, 50000, 60000]}
#casting the dictionary to a DataFrame
df = pd.DataFrame(x)
#display the result df
df
| Name | ID | Department | Salary | |
|---|---|---|---|---|
| 0 | Rose | 1 | Architect Group | 100000 |
| 1 | John | 2 | Software Group | 80000 |
| 2 | Jane | 3 | Design Team | 50000 |
| 3 | Mary | 4 | Infrastructure | 60000 |
We can see the direct correspondence between the table. The keys correspond to the column labels and the values or lists correspond to the rows.
Column Selection:¶
To select a column in Pandas DataFrame, we can either access the columns by calling them by their columns name.
Let's Retrieve the data present in the ID column.
#Retrieving the "ID" column and assigning it to a variable x
#The effect on object is based on
# the number of brackets
x = df[['ID']]
x
y=df['Name']
y
0 Rose 1 John 2 Jane 3 Mary Name: Name, dtype: object
Let's use the type() function and check the type of the variable.
#check the type of x
print(type(x))
print(type(y))
<class 'pandas.core.frame.DataFrame'> <class 'pandas.core.series.Series'>
The output shows us that the type of the variable is a DataFrame object.
Access to multiple columns¶
Let us retrieve the data for Department, Salary and ID columns
#Retrieving the Department, Salary and ID columns and assigning it to a variable z
z = df[['Department','Salary','ID']]
z
| Department | Salary | ID | |
|---|---|---|---|
| 0 | Architect Group | 100000 | 1 |
| 1 | Software Group | 80000 | 2 |
| 2 | Design Team | 50000 | 3 |
| 3 | Infrastructure | 60000 | 4 |
loc() and iloc()
loc() is a label-based data selecting method which means that we have to pass the name of the row or column that we want to select. This method includes the last element of the range passed in it.
Simple syntax for your understanding:
- loc[row_label, column_label]
iloc() is an indexed-based selecting method which means that we have to pass an integer index in the method to select a specific row/column. This method does not include the last element of the range passed in it.
Simple syntax for your understanding:
- iloc[row_index (range], column_index (range]]
# Access the value on the first row and the first column
df.iloc[0, 0]
'Rose'
# Access the value on the first row and the third column
df.iloc[0:3,0:2]
| Name | ID | |
|---|---|---|
| 0 | Rose | 1 |
| 1 | John | 2 |
| 2 | Jane | 3 |
# Access the column using the name
df.loc[0, 'Salary']
100000
Slicing dataframes¶
Slicing uses the [] operator to select a set of rows and/or columns from a DataFrame.
To slice out a set of rows, you use this syntax: data[start:stop],
here the start represents the index from where to consider, and stop represents the index one step BEYOND the row you want to select. You can perform slicing using both the index and the name of the column.
NOTE: When slicing in pandas, the start bound is included in the output.
So if you want to select rows 0, 1, and 2 your code would look like this: df.iloc[0:3].
It means you are telling Python to start at index 0 and select rows 0, 1, 2 up to but not including 3.
NOTE: Labels must be found in the DataFrame or you will get a KeyError.
Indexing by labels(i.e. using loc()) differs from indexing by integers (i.e. using iloc()). With loc(), both the start bound and the stop bound are inclusive. When using loc(), integers can be used, but the integers refer to the index label and not the position.
For example, using loc() and select 1:4 will get a different result than using iloc() to select rows 1:4.
We can also select a specific data value using a row and column location within the DataFrame and iloc indexing.
#let us do the slicing using loc() function on old dataframe df where index column is having labels as 0,1,2
df.loc[0:2,'Name':'Department']
Name
ID
Department
0
Rose
1
Architect Group
1
John
2
Software Group
2
Jane
3
Design Team
#let us do the slicing using loc() function on new dataframe df2 where index column is Name having labels: Rose, John and Jane
df2=df;
df2=df2.set_index("Name")
df2.loc['Rose':'Jane', 'ID':'Department']
ID
Department
Name
Rose
1
Architect Group
John
2
Software Group
Jane
3
Design Team
Numpy¶
#let us do the slicing using loc() function on old dataframe df where index column is having labels as 0,1,2
df.loc[0:2,'Name':'Department']
| Name | ID | Department | |
|---|---|---|---|
| 0 | Rose | 1 | Architect Group |
| 1 | John | 2 | Software Group |
| 2 | Jane | 3 | Design Team |
#let us do the slicing using loc() function on new dataframe df2 where index column is Name having labels: Rose, John and Jane
df2=df;
df2=df2.set_index("Name")
df2.loc['Rose':'Jane', 'ID':'Department']
| ID | Department | |
|---|---|---|
| Name | ||
| Rose | 1 | Architect Group |
| John | 2 | Software Group |
| Jane | 3 | Design Team |
NumPy is a Python library used for working with arrays, linear algebra, fourier transform, and matrices. NumPy stands for Numerical Python and it is an open source project. The array object in NumPy is called ndarray, it provides a lot of supporting functions that make working with ndarray very easy.
Arrays are very frequently used in data science, where speed and resources are very important.
NumPy is usually imported under the np alias.
It's usually fixed in size and each element is of the same type. We can cast a list to a numpy array by first importing numpy:
# import numpy library
import numpy as np
Assign value¶
We then cast the list as follows:
# Create a numpy array
a = np.array([0, 1, 2, 3, 4])
a
array([0, 1, 2, 3, 4])
# Assign the first element to 100
c=a
c[0] = 100
c
array([100, 1, 2, 3, 4])
Each element is of the same type, in this case integers:

print(np.__version__)
# Check the type of the array
print(type(a))
# Check the type of the values stored in numpy array
a.dtype
1.26.4 <class 'numpy.ndarray'>
dtype('int32')
a = np.array([10, 2, 30, 40,50])
# Enter your code here
a[1]=20
Slicing¶
Like lists, we can slice the numpy array. Slicing in python means taking the elements from the given index to another given index.
We pass slice like this: [start:end].The element at end index is not being included in the output.
We can select the elements from 1 to 3 and assign it to a new numpy array d as follows:
c[3:5] = 300, 400
c
array([100, 1, 2, 300, 400])
We can also define the steps in slicing, like this: [start:end:step].
arr = np.array([1, 2, 3, 4, 5, 6, 7])
print(arr[1:5:2])
[2 4]
# If we don't pass start its considered 0
print(arr[:4])
[1 2 3 4]
#If we don't pass end it considers till the length of array.
print(arr[4:])
[5 6 7]
# If we don't pass step its considered 1
print(arr[1:5:])
[2 3 4 5]
Assign Value with List¶
# Create the index list
select = [0, 2, 3, 4]
select
d = c[select]
d
#Assign the specified elements to new value
c[select] = 100000
c
array([100000, 1, 100000, 100000, 100000])
print([a.size, a.ndim])
[5, 1]
Numpy Statistical Functions¶
# Get the mean of numpy array
mean = a.mean()
mean
30.0
# Get the standard deviation of numpy array
standard_deviation=a.std()
standard_deviation
14.142135623730951
max_a = a.max()
max_a
min_a = a.min()
print([min_a,max_a])
[10, 50]
Numpy Array Operations¶
You could use arithmetic operators directly between NumPy arrays
Addition¶
u = np.array([1, 0])
u
v = np.array([0, 1])
v
z = np.add(u, v) # == z=u+v
z
print([u,v,z])
[array([1, 0]), array([0, 1]), array([1, 1])]
# Plotting addition
import time
import sys
import numpy as np
import matplotlib.pyplot as plt
def Plotvec1(u, z, v):
fh=plt.figure
ax = plt.axes() # to generate the full window axes
ax.arrow(0, 0, *u, head_width=0.05, color='r', head_length=0.1)# Add an arrow to the U Axes with arrow head width 0.05, color red and arrow head length 0.1
plt.text(*(u + 0.1), 'u')#Adds the text u to the Axes
ax.arrow(0, 0, *v, head_width=0.05, color='b', head_length=0.1)# Add an arrow to the v Axes with arrow head width 0.05, color red and arrow head length 0.1
plt.text(*(v + 0.1), 'v')#Adds the text v to the Axes
ax.arrow(0, 0, *z, head_width=0.05, head_length=0.1)
plt.text(*(z + 0.1), 'z')#Adds the text z to the Axes
plt.ylim(-2, 2)#set the ylim to bottom(-2), top(2)
plt.xlim(-2, 2)#set the xlim to left(-2), right(2)
plt.show(fh)
Plotvec1(u, z, v)
Multiplication¶
arr1 = np.array([10, 11, 12, 13, 14, 15])
arr2 = np.array([20, 21, 22, 23, 24, 25])
print(c)
z = np.multiply(arr1, arr2)
print(z)
z = arr1*arr2
print(z)
z
[100000 1 100000 100000 100000] [200 231 264 299 336 375] [200 231 264 299 336 375]
array([200, 231, 264, 299, 336, 375])
Division¶
a = arr1
a
b = arr2
b
c = np.divide(arr1, arr2)
print(c)
c = a/b
print(c)
[0.5 0.52380952 0.54545455 0.56521739 0.58333333 0.6 ] [0.5 0.52380952 0.54545455 0.56521739 0.58333333 0.6 ]
Dot product¶
np.dot(arr1,arr2)
1705
# Import the libraries
import time
import sys
import numpy as np
import matplotlib.pyplot as plt
def Plotvec2(a,b):
fx = plt.figure()
ax = plt.axes()# to generate the full window axes
ax.arrow(0, 0, *a, head_width=0.05, color ='r', head_length=0.1)#Add an arrow to the a Axes with arrow head width 0.05, color red and arrow head length 0.1
plt.text(*(a + 0.1), 'a')
ax.arrow(0, 0, *b, head_width=0.05, color ='b', head_length=0.1)#Add an arrow to the b Axes with arrow head width 0.05, color blue and arrow head length 0.1
plt.text(*(b + 0.1), 'b')
plt.ylim(-2, 2)#set the ylim to bottom(-2), top(2)
plt.xlim(-2, 2)#set the xlim to left(-2), right(2)
plt.show(fx)
# Write your code below and press Shift+Enter to execute
a=np.array([-1, 1])
b=np.array([1, 1])
Plotvec2(a,b)
dab=np.dot(a,b)
print('a-b dot product is: ',dab)
a-b dot product is: 0
Adding Constant to a Numpy Array¶
Consider the following array:
# Create a constant to numpy array
u = np.array([1, 2, 3, -1])
u
y=u + 1
print(u,y)
[ 1 2 3 -1] [2 3 4 0]
Mathematical Functions¶
We can access the value of pi in numpy as follows :
# The value of pi
np.pi
# Create the numpy array in radians
x = np.array([0, np.pi/2 , np.pi])
# Calculate the sin of each elements
y = np.sin(x)
y
array([0.0000000e+00, 1.0000000e+00, 1.2246468e-16])
Linspace¶
A useful function for plotting mathematical functions is linspace. Linspace returns evenly spaced numbers over a specified interval.
numpy.linspace(start, stop, num = int value)
start : start of interval range
stop : end of interval range
num : Number of samples to generate.
# Makeup a numpy array within [-2, 2] and 5 elements
np.linspace(-2, 2, num=5)
# Make a numpy array within [0, 2π] and 100 elements
x = np.linspace(0, 2*np.pi, num=100)
# Calculate the sine of x list
y = np.sin(x)
# Plot the result
fg=plt.figure
plt.plot(x, y)
plt.show()
Iterating 1-D Arrays¶
Iterating means going through elements one by one.
If we iterate on a 1-D array it will go through each element one by one.
If we execute the numpy array, we get in the array format
arr1 = np.array([1, 2, 3])
print(arr1)
# But if you want to result in the form of the list, then you can use for loop:
for x in arr1:
print(x)
[1 2 3] 1 2 3
Matrix Mathematics¶
You have seen that you can use Numpy package functions to perform different types of operations on arrays and matrices. In this reading, you will learn how these operations work mathematically. 1D Arrays : Vectors
A 1D array is often termed as a vector. Depending upon the orientation of the data, the vector can be classified as a row vector or a column vector. This is illustrated in the image below.
1D Arrays : Vectors¶
A 1D array is often termed as a vector. Depending upon the orientation of the data, the vector can be classified as a row vector or a column vector. This is illustrated in the image below.
import numpy as np
Imgname='maths_1.png'
imsdir="C:/Users/Gamaliel/Documents/G/ADD/IBM_DS/Python4DS/IBMs-NBs/Mod4/04.Matrices Archivos/"
def loadIm(imsdir,Imgname):
from IPython.display import Image, display
display(Image(filename=imsdir+Imgname))
loadIm(imsdir,Imgname)
Properties¶
a = [[11, 12, 13], [21, 22, 23], [31, 32, 33]]
b = [[1, 1, 1], [1, 1, 1], [1, 1, 1]]
A = np.array(a)
B = np.array(b)
Adim=A.ndim
print('A\'s Dimensions: ', A.ndim)
print('A\'s Shape: ', A.shape)
print('A\'s Size: ', A.size)
A's Dimensions: 2 A's Shape: (3, 3) A's Size: 9
Mathematically, we can add, subtract, and take the product of two vectors, provided they are the same shape. The images below highlight the mathematical operations conducted on a pair of vectors.
Imgname='maths_2.png'
loadIm(imsdir,Imgname)
Addition and subtraction¶
print('Addition: ')
print(A+B)
print('Subtraction: ')
print(A-B)
Addition: [[12 13 14] [22 23 24] [32 33 34]] Subtraction: [[10 11 12] [20 21 22] [30 31 32]]
1D Multiplication¶
Imgname='maths_3.png'
loadIm(imsdir,Imgname)
print('Multiplication: ')
print(A*B)
Multiplication: [[11 12 13] [21 22 23] [31 32 33]]
Scalar addition¶
All three of these operations are conducted on corresponding elements of individual vectors. The resulting array always has the same size as that of the two original vectors.
To a single vector, we can also add a constant (scalar addition), subtract a constant (scalar subtraction) and multiply a constant (scalar multiplication) to any vector. The images below illustrate these operations.
Imgname='maths_4.png'
loadIm(imsdir,Imgname)
Imgname='maths_5.png'
loadIm(imsdir,Imgname)
print('Scalar addition: ')
print(A+1)
print('Scalar subtraction: ')
print(A-1)
print('Scalar multiplication: ')
print(A*2)
Scalar addition: [[12 13 14] [22 23 24] [32 33 34]] Scalar subtraction: [[10 11 12] [20 21 22] [30 31 32]] Scalar multiplication: [[22 24 26] [42 44 46] [62 64 66]]
2D Arrays : Matrices¶
A 2D array is also called a Matrix. These are typically rectangular arrays with data stored in different rows. All of the operations mentioned above are also applicable to the 2D arrays. However, the Dot product of 2D matrices follows a different rule.
As illlustrated in the images below, the dot product is carried out by multiplying and adding corresponding elements of rows of the first matrix with the elements of columns of the second matrix. As a result, the output matrix from the multiplication will have a modified shape.
The general rule is that the dot product of an m X n matrix can be done only with an n X p matrix, and the resultant matrix will have the shape m X p. In the example shown below, the 4 X 2 matrix is multiplied with the 2 X 4 matrix to generate a 4 X 4 matrix.
Accessing different elements of a Numpy Array¶
We can use rectangular brackets to access the different elements of the array. The correspondence between the rectangular brackets and the list and the rectangular representation is shown in the following figure for a 3x3 array:

We can access the 2nd-row, 3rd column as shown in the following figure:

We simply use the square brackets and the indices corresponding to the element we would like:
# Access the element on the second row and third column
A[1, 2]
23
We can also use the following notation to obtain the elements:
# Access the element on the second row and third column
A[1][2]
23
Consider the elements shown in the following figure

We can access the element as follows:
# Access the element on the first row and first column
A[0][0]
11
We can also use slicing in numpy arrays. Consider the following figure. We would like to obtain the first two columns in the first row

This can be done with the following syntax:
# Access the element on the first row and first and second columns
A[0][0:2]
array([11, 12])
Similarly, we can obtain the first two rows of the 3rd column as follows:
# Access the element on the first and second rows and third column
A[0:2, 2]
array([13, 23])
Corresponding to the following figure:

Basic Operations¶
We can also add arrays. The process is identical to matrix addition. Matrix addition of X and Y is shown in the following figure:

The numpy array is given by X and Y
# Create a numpy array X
X = np.array([[1, 0], [0, 1]])
X
array([[1, 0],
[0, 1]])
# Create a numpy array Y
Y = np.array([[2, 1], [1, 2]])
Y
array([[2, 1],
[1, 2]])
We can add the numpy arrays as follows.
# Add X and Y
Z = X + Y
Z
array([[3, 1],
[1, 3]])
Multiplying a numpy array by a scaler is identical to multiplying a matrix by a scaler. If we multiply the matrix Y by the scaler 2, we simply multiply every element in the matrix by 2, as shown in the figure.

We can perform the same operation in numpy as follows
# Create a numpy array Y
Y = np.array([[2, 1], [1, 2]])
Y
array([[2, 1],
[1, 2]])
# Multiply Y with 2
Z = 2 * Y
Z
array([[4, 2],
[2, 4]])
Multiplication of two arrays corresponds to an element-wise product or Hadamard product. Consider matrix X and Y. The Hadamard product corresponds to multiplying each of the elements in the same position, i.e. multiplying elements contained in the same color boxes together. The result is a new matrix that is the same size as matrix Y or X, as shown in the following figure.

We can perform element-wise product of the array X and Y as follows:
# Create a numpy array Y
Y = np.array([[2, 1], [1, 2]])
Y
array([[2, 1],
[1, 2]])
# Create a numpy array X
X = np.array([[1, 0], [0, 1]])
X
array([[1, 0],
[0, 1]])
# Multiply X with Y
Z = X * Y
Z
array([[2, 0],
[0, 2]])
We can also perform matrix multiplication with the numpy arrays A and B as follows:
Imgname='maths_6.png'
loadIm(imsdir,Imgname)
In the reverse example, when 2 X 4 matrix is multiplied with the 4 X 2 one, the resultant will be a 2 X 2 matrix.
Imgname='maths_7.png'
loadIm(imsdir,Imgname)
First, we define matrix A and B:
# Create a matrix A
A = np.array([[0, 1, 1], [1, 0, 1]])
A
array([[0, 1, 1],
[1, 0, 1]])
# Create a matrix B
B = np.array([[1, 1], [1, 1], [-1, 1]])
B
array([[ 1, 1],
[ 1, 1],
[-1, 1]])
We use the numpy function dot to multiply the arrays together.
# Calculate the dot product
Z = np.dot(A,B)
Z
array([[0, 2],
[0, 2]])
# Calculate the sine of Z
np.sin(Z)
array([[0. , 0.90929743],
[0. , 0.90929743]])
We use the numpy attribute T to calculate the transposed matrix
# Create a matrix C
C = np.array([[1,1],[2,2],[3,3]])
C
array([[1, 1],
[2, 2],
[3, 3]])
# Get the transposed of C
C.T
array([[1, 2, 3],
[1, 2, 3]])
Module 4 Summary: Working with Data in Python¶
Python uses the open() function and allows you to read and write files, providing access to the content within the file for reading. It also allows overwriting it for writing and specifies the file mode (for example, r for reading, w for writing, a for appending).
- To read a file, Python uses an open function along with r.
- Python uses the open with function to read and process a file attribute, that is, from open to close.
- In Python, you use the open method to edit or overwrite a file.
- To write a file, Python uses the open function along with w.
- In Python, "a" indicates that the program has appended to the file.
- In Python, “\n” signifies that the code should start on a new line.
- Python uses various methods to print lines from attributes.
Pandas is a powerful Python library for data manipulation and analysis, providing data structures and functions to work with structured data like data frames and series.
- You import the file (panda) by using the import command followed by the file name.
- In Python, you use the as command to provide a shorter name for the file.
- In Pandas, you use a data frame (df) to specify the files to read.
- DataFrames consist of rows and columns.
- You can create new DataFrames by using the column or columns of a specific DataFrame.
- We can work with data in a DataFrames and save the results in different formats.
- In Python, you use the Unique method to determine unique elements in a column of the DataFrames.
- You use the inequality operator along with df to assign a Boolean value to the selected column in DataFrames.
- You save a new DataFrame as a different DataFrame, which may contain values from an earlier DataFrame.
NumPy is a Python library for numerical and matrix operations, offering multidimensional array objects and a variety of mathematical functions to work with data efficiently.
- NumPy is a basis for Pandas.
- A NumPy array or ND array is similar to a list, usually of a fixed size with the same kind of element.
- A one-dimensional NumPy array is a linear sequence of elements with a single axis, like a traditional list, but optimized for numerical computations and array operations.
- You can access elements in a NumPy using an index.
- You use the attribute dtype to get the data type of the array elements.
- You use nsize and ndim to get the size and dimension of the array, respectively.
- You can use indexing and slicing methods in NumPy.
- Vector additions are widely used operations in Python.
- Representing vector addition with line segments or arrows is useful.
- NumPy codes work much faster, which is helpful with lots of data.
- You perform vector subtraction by replacing the addition sign with a negative sign.
Multiplying an array by a scalar in Python entails multiplying each element of the array by the scalar value, leading to a new array in which each element scales by the scalar.
- Hadamard product refers to the element-wise multiplication of two arrays of the same shape, resulting in a new array where each element is the product of the corresponding elements in the input arrays.
- The dot product in Python is the sum of the element-wise products of two arrays, often used for vector and matrix operations to find the scalar result of multiplying corresponding elements and summing them.
- When working with NumPy, it is common to utilize libraries like Matplotlib to create graphs and visualizations from numerical data stored in NumPy arrays.
- A two-dimensional NumPy array is a grid-like structure with rows and columns suitable for representing data as a matrix or a table for numerical computations.
- In NumPy, "shape" refers to an array's dimensions (number of rows and columns), indicating its size and structure.
- You use the attribute "size" to obtain the size of an array.
- You use rectangular attributes to access the various elements in an array.
- You use a scalar to multiply elements in NumPy.
Module 5¶
API¶
An API lets two pieces of software talk to each other. Just like a function, you don't have to know how the API works, only its inputs and outputs. An essential type of API is a REST API that allows you to access resources via the internet. In this lab, we will review the Pandas Library in the context of an API, we will also review a basic REST API.

For the following data frame:
# Creates a data frame
dict_={'a':[11,21,31],'b':[12,22,32]}
df=pd.DataFrame(dict_)
df
| a | b | |
|---|---|---|
| 0 | 11 | 12 |
| 1 | 21 | 22 |
| 2 | 31 | 32 |
When you call any method, the dataframe communicates with the API displaying the first few rows of the dataframe.
df.head()
df.mean()
a 21.0 b 22.0 dtype: float64
REST APIs¶
Rest APIs function by sending a request, the request is communicated via HTTP message. The HTTP message usually contains a JSON file. This contains instructions for what operation we would like the service or resource to perform. In a similar manner, API returns a response, via an HTTP message, this response is usually contained within a JSON.
In this lab, we will use the NBA API to determine how well the Golden State Warriors performed against the Toronto Raptors. We will use the API to determine the number of points the Golden State Warriors won or lost by for each game. So if the value is three, the Golden State Warriors won by three points. Similarly it the Golden State Warriors lost by two points the result will be negative two. The API will handle a lot of the details, such a Endpoints and Authentication.
import requests
import pandas as pd
import matplotlib.pyplot as plt
url = "https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/PY0101EN/Chapter%205/Labs/Golden_State.pkl"
def download(url, filename):
response = requests.get(url)
if response.status_code == 200:
with open(filename, "wb") as f:
f.write(response.content)
download(url, "Golden_State.pkl")
file_name = "Golden_State.pkl"
games = pd.read_pickle(file_name)
games_home=games[games['MATCHUP']=='GSW vs. TOR']
games_away=games[games['MATCHUP']=='GSW @ TOR']
fig, ax = plt.subplots()
games_away.plot(x='GAME_DATE',y='PLUS_MINUS', ax=ax)
games_home.plot(x='GAME_DATE',y='PLUS_MINUS', ax=ax)
ax.legend(["away", "home"])
plt.show()
Overview of HTTP¶
When you, the client, use a web page your browser sends an HTTP request to the server where the page is hosted. The server tries to find the desired resource by default "index.html". If your request is successful, the server will send the object to the client in an HTTP response. This includes information like the type of the resource, the length of the resource, and other information.
The figure below represents the process. The circle on the left represents the client, the circle on the right represents the Web server. The table under the Web server represents a list of resources stored in the web server. In this case an HTML file, png image, and txt file .
The HTTP protocol allows you to send and receive information through the web including webpages, images, and other web resources. In this lab, we will provide an overview of the Requests library for interacting with the HTTP protocol.
</p
Uniform Resource Locator:URL¶
Uniform resource locator (URL) is the most popular way to find resources on the web. We can break the URL into three parts.
- Scheme:- This is this protocol, for this lab it will always be
http:// - Internet address or Base URL :- This will be used to find the location here are some examples:
www.ibm.comandwww.gitlab.com - Route:- Location on the web server for example:
/images/IDSNlogo.png
You may also hear the term Uniform Resource Identifier (URI), URL are actually a subset of URIs. Another popular term is endpoint, this is the URL of an operation provided by a Web server.
Request¶
The process can be broken into the Request and Response process. The request using the get method is partially illustrated below. In the start line we have the GET method, this is an HTTP method. Also the location of the resource /index.html and the HTTP version. The Request header passes additional information with an HTTP request:
When an HTTP request is made, an HTTP method is sent, this tells the server what action to perform. A list of several HTTP methods is shown below. We will go over more examples later.
Response¶
The figure below represents the response; the response start line contains the version number HTTP/1.0, a status code (200) meaning success, followed by a descriptive phrase (OK). The response header contains useful information. Finally, we have the response body containing the requested file, an HTML document. It should be noted that some requests have headers.
Some status code examples are shown in the table below, the prefix indicates the class. These are shown in yellow, with actual status codes shown in white. Check out the following link for more descriptions.
Web scraping¶
The most straigthforward way for webscraping is to:
- Usual path
- Use beautiful soup
- Parse the HTML file with beautifulsoup
- Working with Pandas DataFrame()
- Use beautiful soup
- Alternative option
- Using Pandas (beautifulsoup is the usual way to do so)
- Working with Pandas DataFrame()
- Downloading (as from kaggle):
- .csv
- .json
- .xml
- Working with Pandas DataFrame()
- Using Pandas (beautifulsoup is the usual way to do so)
Random user¶
from randomuser import RandomUser
import pandas as pd
from IPython.display import Image
First, we will create a random user object, r.
r = RandomUser()
Then, using generate_users() function, we get a list of random 10 users.
some_list = r.generate_users(10)
def get_users(nusers):
users =[]
for user in RandomUser.generate_users(nusers):
users.append({"Name":user.get_full_name(),"Gender":user.get_gender(),"City":user.get_city(),"State":user.get_state(),"Email":user.get_email(), "DOB":user.get_dob(),"Picture":user.get_picture()})
return pd.DataFrame(users)#, users
Users_table=get_users(4)
Users_table
| Name | Gender | City | State | DOB | Picture | ||
|---|---|---|---|---|---|---|---|
| 0 | Edwin Dubois | male | Gempen | Nidwalden | edwin.dubois@example.com | 1991-12-03T18:03:21.351Z | https://randomuser.me/api/portraits/men/0.jpg |
| 1 | Jon Richardson | male | Peterborough | Dorset | jon.richardson@example.com | 1993-09-03T15:56:13.111Z | https://randomuser.me/api/portraits/men/77.jpg |
| 2 | John Taylor | male | Christchurch | Tasman | john.taylor@example.com | 1955-10-20T13:29:46.224Z | https://randomuser.me/api/portraits/men/91.jpg |
| 3 | Paula Sanz | female | Vigo | Castilla la Mancha | paula.sanz@example.com | 1983-10-31T21:43:37.106Z | https://randomuser.me/api/portraits/women/15.jpg |
RanUs=Users_table.iloc[0]
RanUs
Name Edwin Dubois Gender male City Gempen State Nidwalden Email edwin.dubois@example.com DOB 1991-12-03T18:03:21.351Z Picture https://randomuser.me/api/portraits/men/0.jpg Name: 0, dtype: object
RanUs['Name']
'Edwin Dubois'
Fruityvice API¶
import requests
import json
We will obtain the fruityvice API data using requests.get("url") function. The data is in a json format.
data = requests.get("https://web.archive.org/web/20240929211114/https://fruityvice.com/api/fruit/all")
We will retrieve results using json.loads() function.
results = json.loads(data.text)
We will convert our json data into pandas data frame.
pd.DataFrame(results).head()
| name | id | family | order | genus | nutritions | |
|---|---|---|---|---|---|---|
| 0 | Persimmon | 52 | Ebenaceae | Rosales | Diospyros | {'calories': 81, 'fat': 0.0, 'sugar': 18.0, 'c... |
| 1 | Strawberry | 3 | Rosaceae | Rosales | Fragaria | {'calories': 29, 'fat': 0.4, 'sugar': 5.4, 'ca... |
| 2 | Banana | 1 | Musaceae | Zingiberales | Musa | {'calories': 96, 'fat': 0.2, 'sugar': 17.2, 'c... |
| 3 | Tomato | 5 | Solanaceae | Solanales | Solanum | {'calories': 74, 'fat': 0.2, 'sugar': 2.6, 'ca... |
| 4 | Pear | 4 | Rosaceae | Rosales | Pyrus | {'calories': 57, 'fat': 0.1, 'sugar': 10.0, 'c... |
The result is in a nested json format. The 'nutrition' column contains multiple subcolumns, so the data needs to be 'flattened' or normalized.
df2 = pd.json_normalize(results)
df2.iloc[0:2,:]
| name | id | family | order | genus | nutritions.calories | nutritions.fat | nutritions.sugar | nutritions.carbohydrates | nutritions.protein | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Persimmon | 52 | Ebenaceae | Rosales | Diospyros | 81 | 0.0 | 18.0 | 18.0 | 0.0 |
| 1 | Strawberry | 3 | Rosaceae | Rosales | Fragaria | 29 | 0.4 | 5.4 | 5.5 | 0.8 |
Let's see if we can extract some information from this dataframe. Perhaps, we need to know the family and genus of a cherry.
cherry = df2.loc[df2["name"] == 'Cherry']
#print(cherry)
(cherry.iloc[0]['family']) , (cherry.iloc[0]['genus'])
('Rosaceae', 'Prunus')
Module 5 Summary: APIs and Data Collection
Simple APIs in Python are application programming interfaces that provide straightforward and easy-to-use methods for interacting with services, libraries, or data, often with minimal configuration or complexity.
An API lets two pieces of software talk to each other.
Using an API library in Python entails importing the library, calling its functions or methods to make HTTP requests, and parsing the responses to access data or services provided by the API.
Pandas API processes the data by communicating with the other software components.
An Instance forms when you create a dictionary and then use the DataFrames constructor to create a Pandas object.
Method “head()” will display the mentioned number of rows from the top (default 5) of DataFrames, while method “mean()” will calculate the mean and return the values
Rest APIs allow you to communicate through the internet, taking advantage of resources like storage, access more data, AI algorithms, and so on.
HTTP methods transmit data over the internet.
An HTTP message typically includes a JSON file with instructions for operations.
HTTP messages containing JSON files are returned to the client as a response from web services.
Dealing with time series data involves using the Pandas time series function.
You can get data for daily candlesticks and plot the chart using Plotly with the candlestick plot.
The HTTP (HyperText Transfer Protocol) transfers data, including web pages and resources, between a client (a web browser) and a server on the World Wide Web.
The HTTP protocol is commonly used for implementing various types of REST APIs.
An HTTP response includes information like the type of resource, length of resource, and so on
Uniform resource locator (URL) is the most popular way to find resources on the web.
URL is divided into three parts: scheme, internet address or base URL, and route
The GET method is one of the popular methods of requesting information. Some other methods may also include the body.
Response method contains the version and body of the response.
POST submits data to the server, PUT updates data already on the server, DELETE deletes data from the server
Requests is a Python library that allows you to send HTTP/1.1 requests
You can modify the results of your query with the GET method.
You can obtain multiple requests from a URL like name, ID, and so on with a Query string.
Web scraping in Python involves extracting and parsing data from websites to gather information for various applications, using libraries like Beautiful Soup and requests.
HTML comprises text surrounded by blue text elements enclosed in angular brackets called tags.
You can select an HTML element on a web page to inspect the webpage.
Web pages may also contain CSS and JavaScript along with HTML elements.
Each HTML document is like an HTML Tree, which may contain strings and other tags.
Each HTML table is comprised of table tags and is structured with elements such as rows, headers, body and so on.
Tabular data can also be extracted from web pages using the
read_htmlmethod in Pandas.Beautiful Soup in Python is a library for parsing and navigating HTML and XML documents, making extracting, and manipulating data from web pages more accessible.
To parse a document, pass it through the Beautiful Soup constructor to get a beautiful soup object representing the document as a nested data structure.
Beautiful soup represents HTML as a set of tree-like objects with methods to parse the HTML.
Navigable string is like a Python string that supports beautiful soup functionality.
find_all is a method used to extract content based on the tag’s name, its attributes, the text of a string, or some combination of these.
The find_all method looks through a tag’s descendants and retrieves all descendants that match your filters.
The result is a Python iterable like a list.
File formats refer to the specific structure and encoding rules used to store and represent data in files, such as .txt for plain text or .csv for comma-separated values.
Python works with different file formats such as CSV, XML, JSON, xlsx, and so on
The extension of a file name will let you know what type of file it is and what it needs to open with.
To access data from CSV files, we can use Python libraries such as Pandas.
Similarly, different methods help parse JSON, XML, and other files.
Extra entries¶
X=np.array([[1,0],[0,1]])
Y=np.array([[2,1],[1,2]])
Z=np.dot(X,Y)
Z
array([[2, 1],
[1, 2]])
Saving¶
try:
!jupyter nbconvert P4DSNotes.ipynb --to html --template pj
except Exception as e:
print('HTML not stored')
import shutil
import os
#file2=Tofld+'P4DSNotes.html'
# The line above copies files from A -> B
#shutil.copy(os.path.join(FromFld,fileh), Tofld)
# The line above copies all the content from A -> B
#shutil.copytree(FromFld, Tofld)
import shutil
FromFld='C:/Users/Gamaliel/Documents/G/ADD/IBM_DS/Python4DS/Mine/'
Tofld='C:/Users/Gamaliel/Documents/MEGAsync/BC/Conducta visual/Data_Science_Notes/'
fileh='P4DSNotes.html'
filep='P4DSNotes.pdf'
try:
if os.path.isfile(Tofld+'/'+fileh):
os.remove(Tofld+'/'+fileh)
print(fileh, 'deleted in', Tofld)
shutil.move(os.path.join(FromFld,fileh),os.path.join(Tofld,fileh))
print(fileh, 'replaced in', Tofld)
else:
shutil.move(os.path.join(FromFld,fileh),os.path.join(Tofld,fileh))
print(fileh, 'written in', Tofld)
except Exception as e:
print('HTML not moved')
P4DSNotes.html deleted in C:/Users/Gamaliel/Documents/MEGAsync/BC/Conducta visual/Data_Science_Notes/ P4DSNotes.html replaced in C:/Users/Gamaliel/Documents/MEGAsync/BC/Conducta visual/Data_Science_Notes/
#FromFld='C:/Users/Gamaliel/Documents/G/ADD/IBM_DS/Python4DS/IBMs-NBs/'
#os.chdir(FromFld)
#!jupyter execute HTMLs.ipynb --allow-errors